For your handwritten solution, scan or take a picture of them.
For your code, only .ipynb file will be graded.
Please compress all the files to make a single .zip file
Do not submit a printed version of your code. It will not be graded.
Autoencoder (AE) is a neural network model which trains dimension reduction. The features shrunk by the encoder are suitable for reconstructing rather than valuable to solve our problem. Therefore, when we reconstruct a data from the feature on latent space we can find unwanted entanglement that hinders distinguish from class to class. However, the entanglement gives us some advantage as following. Discrete inputs produce discrete features on the latent space and the discrete features generate discrete outputs as well. But due to the entanglement, we can generate data from unseen points on the latent space so that the decoder become a model generates data from continuous distribution on the latent space. Thus, data generation is one of main purpose of AE and there are famous models such as variational autoencoder or adversarial autoencoder.
Conditional autoencoder is one of the modified AEs that focus on data generation. It determines which class to generate in advance so that it can generate data of a specific class (or digit).
You are asked to model and testify the CAE through the following cells:
(0) Import modules
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline
(1) Load MNIST data
# your code here
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_imgs =
train_labels =
test_imgs =
test_labels =
n_train =
n_test =
print ("The number of training images : {}, shape : {}".format(n_train, train_imgs.shape))
print ("The number of testing images : {}, shape : {}".format(n_test, test_imgs.shape))
(2) Define the CAE (Conditional Autoencoder) structure
# your code here
n_input =
n_encoder1 =
n_encoder2 =
n_latent = 2
n_decoder2 =
n_decoder1 =
n_label = 10
(3) Construct the CAE model. CAE needs label data unlike AE.
tf.concat([input, label]). (see the above figure)x = tf.placeholder(tf.float32, [None, n_input]) # image
y = tf.placeholder(tf.float32, [None, n_label]) # label
# your code here
weights = {
}
biases = {
}
def batch_norm_flat(batch_flat) :
epsilon = 1e-5
beta = tf.Variable(tf.constant(0.0, shape=[1]), trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[1]), trainable=True)
mean, variance = tf.nn.moments(batch_flat, axes=[0])
return norm_batch
def encoder(x, weights, biases, label):
latent = batch_norm_flat(latent)
return latent
def decoder(latent, weights, biases, label):
return reconst
(4) Initialize the model & optimizer.
latent = encoder(x, weights, biases, y)
reconst = decoder(latent, weights, biases, y)
# your code here
loss =
LR =
optm =
(5) Train the model and plot the loss
n_batch = 50
n_iter = 5000
n_prt = 250
def train_batch_maker(batch_size):
random_idx = np.random.randint(n_train, size = batch_size)
return train_imgs[random_idx], train_labels[random_idx]
def test_batch_maker(batch_size):
random_idx = np.random.randint(n_test, size = batch_size)
return test_imgs[random_idx], test_labels[random_idx]
sess = tf.Session()
# your code here
plt.figure(figsize=(10,8))
plt.plot(np.arange(len(loss_record_train))*n_prt, loss_record_train, label = 'training')
plt.plot(np.arange(len(loss_record_test))*n_prt, loss_record_test, label = 'testing')
plt.xlabel('iteration', fontsize = 15)
plt.ylabel('loss', fontsize = 15)
plt.legend(fontsize = 12)
plt.ylim([0,np.max(loss_record_train)])
plt.show()
(6) Reconstruct 10 random images and compare it with its original.
# your code here
(7) Plot the latent space.
test_x, test_y = test_batch_maker(500)
# your code here
(8) Plot the latent space of each digit to see the each distribution.
# your code here
(9) Generate images of each digit by feeding a new point and different label conditions to the decoder. Discuss whether those are in a similar style.
new_x, new_y = np.random.uniform(xmin, xmax), np.random.uniform(ymin, ymax)
new_data = # your code here
print('new_data:', new_data)
fig = plt.figure(figsize = (10,10))
for i in range(10):
plt.scatter(test_latent[test_y_arg == i,0], test_latent[test_y_arg == i,1],
label = str(i), marker=i)
plt.axvline(x=new_data[0,0], c='r', alpha=0.2)
plt.axhline(y=new_data[0,1], c='r', alpha=0.2)
plt.scatter(new_data[0,0], new_data[0,1], marker='x', c='k', s = 100, label = 'new data')
plt.title('Latent Space', fontsize=15)
plt.xlabel('Z1', fontsize=15)
plt.ylabel('Z2', fontsize=15)
plt.legend(fontsize = 15)
plt.xlim([xmin, xmax]); plt.ylim([ymin, ymax])
plt.show()
latent_input = tf.placeholder(tf.float32, [None, n_latent])
reconst = decoder(latent_input, weights, biases, y)
plt.figure(figsize=(18, 8))
for i in range(10):
feed = {latent_input: new_data, y: np.eye(10, dtype=np.float32)[i:i+1]}
fake_image = sess.run(reconst, feed_dict = feed)
plt.subplot(2, 5, i+1)
plt.imshow(fake_image.reshape(28,28), 'gray')
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
(10) Draw the manifold image with latent features for all digits and discuss the result based on what you found in (9).
nx = 20
ny = 20
x_values = np.linspace(xmin, xmax, nx)
y_values = np.linspace(ymin, ymax, ny)
canvas = np.empty((28*ny, 28*nx))
for k in range(10):
plt.figure(figsize = (16, 8))
plt.subplot(1,2,1)
plt.scatter(test_latent[test_y_arg == k,0], test_latent[test_y_arg == k,1],
label = str(k), marker=k, color=(plt.rcParams['axes.prop_cycle'].by_key()['color'])[k])
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(fontsize = 12)
plt.xlim([-6, 6]); plt.ylim([-6, 6])
plt.subplot(1,2,2)
# your code here
plt.imshow(canvas, 'gray')
plt.title('Manifold', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()